Library

library(rstan)
library(dplyr)
library(EBImage)
library(ggplot2)
library(spatstat)
library(randomcoloR)
library(SpatialExperiment)

Dataset

Load the TNBC SPE object.

load(here::here("Output", "Data", "03_SPE", "03_TNBC_2018_spe.rds"))
spe
## class: SpatialExperiment 
## dim: 36 179194 
## metadata(0):
## assays(1): exprs
## rownames(36): betaCatenin CD11b ... SMA Vimentin
## rowData names(0):
## colnames(179194): Cell_1 Cell_2 ... Cell_179193 Cell_179194
## colData names(26): sample_id patient_id ... Si Ta
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
## spatialCoords names(2) : centroidX centroidY
## imgData names(1): sample_id

Posterior Sampling

Load posterior sampling results of 5 topics using 4 chains and 2000 iterations on the dataset.

source(here::here("Notebooks", 
                  "06_LDA_scripts", "06_LDA_analysis.R"))

TNBC_2018_LDA_5_2000 <- LDA_analysis(
  spe = spe,
  SampleID_name = "sample_id",
  cellType_name = "mm",
  K = 5,
  alpha = 0.8,
  gamma = 0.8,
  iter = 2000,
  chain = 4,
  col_names_theta_all = c("iteration", "Sample", "Topic", "topic.dis"),
  col_names_beta_hat = c("iterations", "Topic", "Cell.Type", "beta_h"),
  stan_file_path = here::here("Notebooks", "06_LDA_scripts", "06_lda.stan"),
  load_file = TRUE,
  save_file = FALSE,
  file_default = TRUE,
  file_fold = "07_LDA_multiChains"
)
##    user  system elapsed 
##  20.954   2.909  24.105

Cell Type Proportion in each Topics

beta_hat <- TNBC_2018_LDA_5_2000$beta_aligned
dim(beta_hat)
## [1] 4000    5   16
dimnames(beta_hat)
## [[1]]
## NULL
## 
## [[2]]
## [1] "Topic_4" "Topic_5" "Topic_3" "Topic_2" "Topic_1"
## 
## [[3]]
##  [1] "B"            "CD3 T"        "CD4 T"        "CD8 T"        "DC"          
##  [6] "DC/Mono"      "Endothelial"  "Epithelial"   "Mac"          "Mesenchymal" 
## [11] "Mono/Neu"     "Neu"          "NK"           "Other"        "Other immune"
## [16] "T reg"
apply(data.frame(beta_hat[, 1, ]), 2, median)
##            B        CD3.T        CD4.T        CD8.T           DC      DC.Mono 
## 4.207629e-05 1.084241e-05 2.314173e-05 5.787219e-05 1.436342e-05 1.645184e-05 
##  Endothelial   Epithelial          Mac  Mesenchymal     Mono.Neu          Neu 
## 3.092605e-05 1.706643e-03 1.475518e-03 3.422458e-02 6.935411e-03 1.237185e-02 
##           NK        Other Other.immune        T.reg 
## 2.936232e-05 9.380175e-01 4.630195e-03 2.262238e-05
cellType_propInPercentage <- function(beta_hat) {
  
  K <- dim(beta_hat)[2]
  cellType_uni <- dim(beta_hat)[3]
  
  cellType_prop_df <- (
    apply(data.frame(beta_hat[, 1, ]), 2, median)
    |> as.data.frame()
  )
  
  for (i in (2:K)){
    cellType_prop <- (
      apply(data.frame(beta_hat[, i, ]), 2, median)
    |> as.data.frame()
    )
    cellType_prop_df <- cbind(cellType_prop_df, cellType_prop)
  }
  
  colnames(cellType_prop_df) <- dimnames(beta_hat)[[2]]
  rownames(cellType_prop_df) <- NULL
  cellType_prop_df <- cbind("cell_type" = dimnames(beta_hat)[[3]],
                            cellType_prop_df)
  
  #cellType_prop_df[ , sort(names(cellType_prop_df))]
  
  return(cellType_prop_df)
}
#abind::abind(beta_chain[[2]],beta_aligned,along = 1) |> View()
cellType_propInPercentage(beta_hat)
##       cell_type      Topic_4      Topic_5      Topic_3      Topic_2
## 1             B 4.207629e-05 8.513105e-05 1.065844e-04 0.6536047609
## 2         CD3 T 1.084241e-05 2.406102e-05 2.767426e-05 0.0168597504
## 3         CD4 T 2.314173e-05 9.058766e-05 7.922252e-05 0.1727531895
## 4         CD8 T 5.787219e-05 4.394182e-04 5.790226e-04 0.0509665789
## 5            DC 1.436342e-05 1.323498e-04 4.156588e-02 0.0206189980
## 6       DC/Mono 1.645184e-05 1.412231e-04 7.398791e-05 0.0174697485
## 7   Endothelial 3.092605e-05 9.265385e-04 1.755651e-03 0.0004695914
## 8    Epithelial 1.706643e-03 9.129625e-01 5.590630e-03 0.0050567299
## 9           Mac 1.475518e-03 4.694115e-03 1.574820e-03 0.0013981865
## 10  Mesenchymal 3.422458e-02 4.613585e-02 8.219965e-01 0.0163997653
## 11     Mono/Neu 6.935411e-03 1.743547e-03 6.793407e-05 0.0009068542
## 12          Neu 1.237185e-02 2.006123e-03 8.916236e-05 0.0128846446
## 13           NK 2.936232e-05 4.101593e-05 1.373928e-03 0.0016536583
## 14        Other 9.380175e-01 2.702834e-02 1.098227e-01 0.0034600699
## 15 Other immune 4.630195e-03 2.017539e-03 1.398154e-02 0.0196668789
## 16        T reg 2.262238e-05 7.528607e-04 7.915677e-05 0.0057425385
##         Topic_1
## 1  0.0005620689
## 2  0.0193821302
## 3  0.1506427035
## 4  0.3341671042
## 5  0.0177982019
## 6  0.0227078273
## 7  0.0051230636
## 8  0.0077643474
## 9  0.1455295420
## 10 0.0042322536
## 11 0.0103942284
## 12 0.0079516268
## 13 0.0055904435
## 14 0.0400597743
## 15 0.2067908578
## 16 0.0190299255

Spatial Compartment Dataframe

We want to construct a spatial compartment dataframe, where the rows are the cell identity. The columns of the dataframe will contain the cell type of the corresponding cell identity along with the proportion of that cell type in each topics for the number of topics \(K\).

spatial_compartment <- function(spe, beta_hat, cellTypeCol = "mm") {
  
  sample_id <- spe$sample_id
  
  #coord_xy <- spatialCoords(spe)
  
  cell_ct <- cbind("cell_id" = dimnames(assay(spe))[[2]], 
                   "cell_type" = spe$mm,
                   spatialCoords(spe)) |> data.frame()
  
  ct_prop <- cellType_propInPercentage(beta_hat)
  
  ## 1 for tumor cells, 0 for immune and non-immune cells
  tumor_not <- factor(ifelse(cell_ct == "Other", 1, 0)[, 2])
  #print(tumor_not)
  
  result <- cbind(sample_id, 
                  left_join(cell_ct, ct_prop, by = "cell_type"),
                  tumor_not
  )
  
  return(result)
}
spa_compa <- spatial_compartment(spe, beta_hat, cellTypeCol = "mm")
head(spa_compa)
##   sample_id cell_id cell_type   centroidX   centroidY      Topic_4      Topic_5
## 1 Sample_01  Cell_1         B 1645.635132 6.729857922 4.207629e-05 8.513105e-05
## 2 Sample_01  Cell_2         B    1693.625 9.326086998 4.207629e-05 8.513105e-05
## 3 Sample_01  Cell_3         B 1813.877197  8.84837532 4.207629e-05 8.513105e-05
## 4 Sample_01  Cell_4         B 1797.039063 17.58687973 4.207629e-05 8.513105e-05
## 5 Sample_01  Cell_5         B 419.3009949 18.70149231 4.207629e-05 8.513105e-05
## 6 Sample_01  Cell_6         B 403.2113342 28.49077988 4.207629e-05 8.513105e-05
##        Topic_3   Topic_2      Topic_1 tumor_not
## 1 0.0001065844 0.6536048 0.0005620689         0
## 2 0.0001065844 0.6536048 0.0005620689         0
## 3 0.0001065844 0.6536048 0.0005620689         0
## 4 0.0001065844 0.6536048 0.0005620689         0
## 5 0.0001065844 0.6536048 0.0005620689         0
## 6 0.0001065844 0.6536048 0.0005620689         0

Patient 1

spe_p4 <- spe[ ,spe$sample_id == "Sample_04"]
centroid_x_p4 <- spatialCoords(spe_p4)[, 1]
centroid_y_p4 <- spatialCoords(spe_p4)[, 2]

#spa_compa_p4 <- spa_compa[spa_compa$sample_id == "Sample_01",]
spa_compa_p4 <- spatial_compartment(spe_p4, beta_hat, cellTypeCol = "mm")
#spa_compa_p4
# save(spa_compa_p4,
#      file = here::here("Output", "Data",
#                        "09_Tessellation", "spa_compa_p4.rds"))

Create spatstat object

# create ppp object with three marks columns
ppp_p4 <- ppp(x = centroid_x_p4, y = centroid_y_p4,
              window = owin(c(0,2048),c(0,2048)),
              marks = subset(spa_compa_p4, 
                        select = c(cell_type, Topic_1, tumor_not))
              )
ppp_p4_tumorNot <- subset(ppp_p4, tumor_not == 0, drop = FALSE)
ppp_p4_tumor <- subset(ppp_p4, tumor_not == 1, drop = FALSE)
plot(quadratcount(split(ppp_p4), nx = 12, ny = 12))

dir_tess_p4 <- dirichlet(ppp_p4_tumorNot)
plot(dir_tess_p4)

library(maptools)
## Loading required package: sp
## 
## Attaching package: 'sp'
## The following object is masked from 'package:IRanges':
## 
##     %over%
## Checking rgeos availability: FALSE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
dir_poly_p4 <- as(dir_tess_p4, "SpatialPolygons")
plot(dir_poly_p4)

cut_ppp_p4 <- cut(ppp_p4_tumor, dir_tess_p4, labels = TRUE)
plot(cut_ppp_p4)
## Warning in default.charmap(ntypes, chars): Too many types to display every type
## as a different character
## Warning: Only 10 out of 3795 symbols are shown in the symbol map

plot(quadratcount(cut_ppp_p4))

split_ppp_p4 <- split(ppp_p4_tumor, dir_tess_p4)
plot(split_ppp_p4[1:30], use.marks = FALSE)

plot(split_ppp_p4[31:60], use.marks = FALSE)

plot(split_ppp_p4[61:90], use.marks = FALSE)

plot(split_ppp_p4[91:120], use.marks = FALSE)

split_spa_point_p4 <- lapply(split_ppp_p4,
                             as.ppp,
                             W = owin(c(0,2048),c(0,2048))
) 
#split_spa_point_p4 <- do.call('rbind', split_spa_point_p4)
#split_spa_point_p4[[2]]$window
split_spa_point_p4 <- sapply(c(1:length(split_spa_point_p4)), function(k){
  split_spa_point_p4[[k]]$n
})
#split_spa_point_p4
dir_poly_p4$numTumor <- split_spa_point_p4

tessllation_space <- tmap::tm_shape(dir_poly_p4) +
  tmap::tm_polygons(
    col = "numTumor",
    title = "Number of \ntumors cells",
    style = "jenks",
    palette = "YlGn",
    n = 6
  ) +
  tmap::tm_layout(legend.outside = TRUE,
                  legend.outside.position = "right")

tessllation_space
## Warning: The projection of the shape object dir_poly_p4 is not known, while it
## seems to be projected.
## Warning: Current projection of shape dir_poly_p4 unknown and cannot be
## determined.

library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
## queen's distance
dir_poly_nb_p4 <- poly2nb(dir_poly_p4)
dir_poly_net_p4 <- nb2lines(dir_poly_nb_p4, coords = coordinates(dir_poly_p4))
## Warning in CRS(proj4string): CRS: projargs should not be NULL; set to NA
dir_poly_lw1_p4 <- nb2listw(dir_poly_nb_p4, zero.policy = TRUE, style = "W")
print.listw(dir_poly_lw1_p4, zero.policy = TRUE)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 3795 
## Number of nonzero links: 22380 
## Percentage nonzero weights: 0.1553948 
## Average number of links: 5.897233 
## 
## Weights style: W 
## Weights constants summary:
##      n       nn   S0       S1       S2
## W 3795 14402025 3795 1313.404 15379.62
## rook's distance
dir_poly_nb_rook_p4 <- poly2nb(dir_poly_p4, queen = FALSE)
dir_poly_net_rook_p4 <- nb2lines(dir_poly_nb_rook_p4, coords = coordinates(dir_poly_p4))
## Warning in CRS(proj4string): CRS: projargs should not be NULL; set to NA
dir_poly_lw1_rook_p4 <- nb2listw(dir_poly_nb_rook_p4, zero.policy = TRUE, style = "W")
print.listw(dir_poly_lw1_rook_p4, zero.policy = TRUE)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 3795 
## Number of nonzero links: 22380 
## Percentage nonzero weights: 0.1553948 
## Average number of links: 5.897233 
## 
## Weights style: W 
## Weights constants summary:
##      n       nn   S0       S1       S2
## W 3795 14402025 3795 1313.404 15379.62

global Moran’s I

mi_numTumor <- moran.test(dir_poly_p4$numTumor, dir_poly_lw1_p4, zero.policy = TRUE)
mi_numTumor
## 
##  Moran I test under randomisation
## 
## data:  dir_poly_p4$numTumor  
## weights: dir_poly_lw1_p4    
## 
## Moran I statistic standard deviate = 61.107, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      5.787187e-01     -2.635741e-04      8.977257e-05

local Moran’s I

lmi_numTumor_queen <- localmoran(dir_poly_p4$numTumor,
                                 listw = dir_poly_lw1_p4)

dir_poly_p4$Ii <- lmi_numTumor_queen[, 1]
dir_poly_p4$E.Ii <- lmi_numTumor_queen[, 2]
dir_poly_p4$Var.Ii <- lmi_numTumor_queen[, 3]
dir_poly_p4$Z.Ii <- lmi_numTumor_queen[, 4]
dir_poly_p4$P <- lmi_numTumor_queen[, 5]
map_poly_p4_queen <- tmap::tm_shape(dir_poly_p4) + 
  tmap::tm_polygons(col = "Z.Ii", 
              title = "Local Moran's I", 
              style = "fixed",
              breaks = c(-Inf, -1.96, 1.96, Inf),
              labels = c("Negative SAC", "Random SAC", "Positive SAC"),
              palette = "RdBu", n = 3,
              midpoint = NA,
              border.alpha = 0.3) +
  tmap::tm_layout(legend.outside = TRUE,
                  legend.outside.position = "right")
map_poly_p4_queen
## Warning: The projection of the shape object dir_poly_p4 is not known, while it
## seems to be projected.
## Warning: Current projection of shape dir_poly_p4 unknown and cannot be
## determined.

# rook's distance
lmi_numTumor_rook <- localmoran(dir_poly_p4$numTumor,
                                 listw = dir_poly_lw1_rook_p4)

dir_poly_p4$Ii <- lmi_numTumor_rook[, 1]
dir_poly_p4$E.Ii <- lmi_numTumor_rook[, 2]
dir_poly_p4$Var.Ii <- lmi_numTumor_rook[, 3]
dir_poly_p4$Z.Ii <- lmi_numTumor_rook[, 4]
dir_poly_p4$P <- lmi_numTumor_rook[, 5]
map_poly_p4_rook <- tmap::tm_shape(dir_poly_p4) + 
  tmap::tm_polygons(col = "Z.Ii", 
              title = "Local Moran's I", 
              style = "fixed",
              breaks = c(-Inf, -1.96, 1.96, Inf),
              labels = c("Negative SAC", "Random SAC", "Positive SAC"),
              palette = "RdBu", n = 3,
              midpoint = NA,
              border.alpha = 0.3) 
map_poly_p4_rook
## Warning: The projection of the shape object dir_poly_p4 is not known, while it
## seems to be projected.
## Warning: Current projection of shape dir_poly_p4 unknown and cannot be
## determined.